In [1]:
from PIL import Image
import tensorflow as tf
import numpy as np
from tqdm import tqdm
import h5py
from matplotlib import pyplot as plt
%matplotlib inline

Import Images


In [2]:
#assumes image is larger than the resize
#resize using shrink and center crop
def resize_scc(im,rs=[500,500]):
    s=im.size
    d = (s[0]-rs[0],s[1]-rs[1])
    md,r = min(d),np.argmin(d)

    if r == 1:
        p_dec = float((s[1]-md))/s[1]
    else:
        p_dec = float((s[0]-md))/s[0]

    im2 = im.resize([int(s[0]*p_dec),int(s[1]*p_dec)],resample=Image.BILINEAR)

    if r == 1:
        chop = int(float(im2.size[0]-rs[0])/2.0)
        im3 = im2.crop(box=[chop,0,chop+rs[0],rs[1]])
    else:
        chop = int(float(im2.size[1]-rs[1])/2.0)
        im3 = im2.crop(box=[0,chop,rs[0],chop+rs[1]])
    assert list(im3.size) == rs
    return im3

In [3]:
# rs = [400,320]
rs = [350,350]

In [5]:
im = Image.open('images/mandelbrot.png')
im = resize_scc(im,rs=rs)
style = np.array(im,dtype=np.float32).reshape((im.size[1],im.size[0],3))
im


Out[5]:

In [6]:
im2 = Image.open('images/code_academy.jpg')
im2 = resize_scc(im2,rs=rs)
content = np.array(im2,dtype=np.float32).reshape((im2.size[1],im2.size[0],3))
im2


Out[6]:

In [7]:
wn = np.clip((np.random.randn(im.size[1],im.size[0],3)*np.sqrt(255.0))+255.0/2,0,255)
Image.fromarray(np.uint8(wn.reshape(im.size[1],im.size[0],3)))


Out[7]:

PreProcess Images


In [8]:
def pre_process(x):
    x=x[:,:,::-1]
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x
def post_process(x):
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    x=x[:,:,::-1]
    return x

In [9]:
style = pre_process(style)
content = pre_process(content)
wn = pre_process(wn)

style=np.expand_dims(style,axis=0)
content=np.expand_dims(content,axis=0)
wn=np.expand_dims(wn,axis=0)

In [10]:
g = tf.Graph()
with g.as_default():
    style_img = tf.constant(style,dtype=tf.float32)
    content_img = tf.constant(content,dtype=tf.float32)
    alter_img = tf.Variable(initial_value=wn,dtype=tf.float32)

Build VGG19

Extract weights


In [11]:
f = h5py.File("vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5")

In [12]:
for l in f.keys():
    print l


block1_conv1
block1_conv2
block1_pool
block2_conv1
block2_conv2
block2_pool
block3_conv1
block3_conv2
block3_conv3
block3_conv4
block3_pool
block4_conv1
block4_conv2
block4_conv3
block4_conv4
block4_pool
block5_conv1
block5_conv2
block5_conv3
block5_conv4
block5_pool
input_2

In [13]:
def parse_layer_conv(x):
    if 'conv' in x:
        c = x.split('conv')[-1]
        b = x.split('block')[1].split('_')[0]
        return (c,b)
    else:
        return None

In [14]:
# x = '1_1'
def getName(x):
    b,c = x.split('_')
    return 'block'+b+'_conv'+c

In [15]:
getName('1_1')


Out[15]:
'block1_conv1'

In [16]:
def getW(f,l):
    return f[l].values()[0].value
def getb(f,l):
    return f[l].values()[1].value

Add Graph


In [17]:
with g.as_default():
    def vgg19(img):
        # Set up weights as constants
        #Block 1
        w1_1 = tf.constant(getW(f,getName('1_1')))
        b1_1 = tf.constant(getb(f,getName('1_1')))
        w1_2 = tf.constant(getW(f,getName('1_2')))
        b1_2 = tf.constant(getb(f,getName('1_2')))

        #Block 2
        w2_1 = tf.constant(getW(f,getName('2_1')))
        b2_1 = tf.constant(getb(f,getName('2_1')))
        w2_2 = tf.constant(getW(f,getName('2_2')))
        b2_2 = tf.constant(getb(f,getName('2_2')))
        
        #Block 3
        w3_1 = tf.constant(getW(f,getName('3_1')))
        b3_1 = tf.constant(getb(f,getName('3_1')))
        w3_2 = tf.constant(getW(f,getName('3_2')))
        b3_2 = tf.constant(getb(f,getName('3_2')))
        w3_3 = tf.constant(getW(f,getName('3_3')))
        b3_3 = tf.constant(getb(f,getName('3_3')))
        
        #Block 4
        w4_1 = tf.constant(getW(f,getName('4_1')))
        b4_1 = tf.constant(getb(f,getName('4_1')))
        w4_2 = tf.constant(getW(f,getName('4_2')))
        b4_2 = tf.constant(getb(f,getName('4_2')))
        w4_3 = tf.constant(getW(f,getName('4_3')))
        b4_3 = tf.constant(getb(f,getName('4_3')))
        
        #Block 4
        w5_1 = tf.constant(getW(f,getName('5_1')))
        b5_1 = tf.constant(getb(f,getName('5_1')))
        w5_2 = tf.constant(getW(f,getName('5_2')))
        b5_2 = tf.constant(getb(f,getName('5_2')))
        w5_3 = tf.constant(getW(f,getName('5_3')))
        b5_3 = tf.constant(getb(f,getName('5_3')))

        #Add convolution operations
        #Block 1
        conv1_1 = tf.nn.relu(tf.nn.conv2d(img,w1_1,[1,1,1,1],padding='SAME')+b1_1)
        conv1_2 = tf.nn.relu(tf.nn.conv2d(conv1_1,w1_2,[1,1,1,1],padding='SAME')+b1_2)
        pool1 = tf.nn.avg_pool(conv1_2,[1,3,3,1],[1,1,1,1],padding='SAME')

        #Block 2 = 
        conv2_1 = tf.nn.relu(tf.nn.conv2d(pool1,w2_1,[1,1,1,1],padding='SAME')+b2_1)
        conv2_2 = tf.nn.relu(tf.nn.conv2d(conv2_1,w2_2,[1,1,1,1],padding='SAME')+b2_2)
        pool2 = tf.nn.avg_pool(conv2_2,[1,3,3,1],[1,1,1,1],padding='SAME')
        
        #Block 3
        conv3_1 = tf.nn.relu(tf.nn.conv2d(pool2,w3_1,[1,1,1,1],padding='SAME')+b3_1)
        conv3_2 = tf.nn.relu(tf.nn.conv2d(conv3_1,w3_2,[1,1,1,1],padding='SAME')+b3_2)
        conv3_3 = tf.nn.relu(tf.nn.conv2d(conv3_2,w3_3,[1,1,1,1],padding='SAME')+b3_3)
        pool3 = tf.nn.avg_pool(conv3_3,[1,3,3,1],[1,1,1,1],padding='SAME')
        
        #Block 4
        conv4_1 = tf.nn.relu(tf.nn.conv2d(pool3,w4_1,[1,1,1,1],padding='SAME')+b4_1)
        conv4_2 = tf.nn.relu(tf.nn.conv2d(conv4_1,w4_2,[1,1,1,1],padding='SAME')+b4_2)
        conv4_3 = tf.nn.relu(tf.nn.conv2d(conv4_2,w4_3,[1,1,1,1],padding='SAME')+b4_3)
        pool4 = tf.nn.avg_pool(conv4_3,[1,3,3,1],[1,1,1,1],padding='SAME')
        
        #Block 4
        conv5_1 = tf.nn.relu(tf.nn.conv2d(pool4,w5_1,[1,1,1,1],padding='SAME')+b5_1)
        conv5_2 = tf.nn.relu(tf.nn.conv2d(conv5_1,w5_2,[1,1,1,1],padding='SAME')+b5_2)
        conv5_3 = tf.nn.relu(tf.nn.conv2d(conv5_2,w5_3,[1,1,1,1],padding='SAME')+b5_3)
        pool5 = tf.nn.avg_pool(conv5_3,[1,3,3,1],[1,1,1,1],padding='SAME')
        
        return [conv1_1,conv2_1,conv3_1,conv4_1,conv5_1,conv4_2]

Gram Matrix


In [18]:
def gram_matrix(F_batch):
    F = tf.transpose(F_batch[0],perm=(2,0,1))
    s = tf.shape(F)
    F_flat = tf.reshape(F,shape=(s[0],s[1]*s[2]))
    G = tf.matmul(F_flat,tf.transpose(F_flat))
    return G

In [19]:
def getStyleCo(F_batch):
    s = tf.to_float(tf.shape(F_batch[0]))
    coef = 1.0/(4.0*(s[0]**2)*(s[1]*s[2])**2)
    return coef

Train

Compute layers, set up losses, and backprop


In [24]:
# alpha = .99
# beta = 0.0001
alpha = 0.01 #content
beta = 30000. #style

In [25]:
with g.as_default():
    l_target_s,l_target_c,l_alter = vgg19(style_img),vgg19(content_img),vgg19(alter_img)
    
    #Style Outputs
    l_target1,l_alter1 = l_target_s[0],l_alter[0]
    l_target2,l_alter2 = l_target_s[1],l_alter[1]
    l_target3,l_alter3 = l_target_s[2],l_alter[2]
    l_target4,l_alter4 = l_target_s[3],l_alter[3]
    l_target5,l_alter5 = l_target_s[4],l_alter[4]
    
    #Gram Matrices
    g_target1,g_alter1 = gram_matrix(l_target1),gram_matrix(l_alter1) 
    g_target2,g_alter2 = gram_matrix(l_target2),gram_matrix(l_alter2)
    g_target3,g_alter3 = gram_matrix(l_target3),gram_matrix(l_alter3)
    g_target4,g_alter4 = gram_matrix(l_target4),gram_matrix(l_alter4)
    g_target5,g_alter5 = gram_matrix(l_target5),gram_matrix(l_alter5)
    
    #Style Loss
    loss1 = tf.reduce_sum(tf.square(g_alter1 - g_target1))*getStyleCo(l_alter1)
    loss2 = tf.reduce_sum(tf.square(g_alter2 - g_target2))*getStyleCo(l_alter2)
    loss3 = tf.reduce_sum(tf.square(g_alter3 - g_target3))*getStyleCo(l_alter3)
    loss4 = tf.reduce_sum(tf.square(g_alter4 - g_target4))*getStyleCo(l_alter4)
    loss5 = tf.reduce_sum(tf.square(g_alter5 - g_target5))*getStyleCo(l_alter5)
    
#     loss_style = loss1
#     loss_style = (1.0/2.0)*(loss1+loss2)
#     loss_style = (1.0/3.0)*(loss1+loss2+loss3)
#     loss_style = (1.0/4.0)*(loss1+loss2+loss3+loss4)
    loss_style = (1.0/5.0)*(loss1+loss2+loss3+loss4+loss5)
    
    #Content Outputs
    l_content4_2,l_alter4_2 = l_target_c[5],l_alter[5]
    
    #Content Loss
#     loss_content = 0.5*tf.reduce_sum(tf.square(l_target_c[2] - l_alter3))
    loss_content = 0.5*tf.reduce_sum(tf.square(l_content4_2 - l_alter4_2))
    
    #Total Loss
    loss = alpha*loss_content+beta*loss_style
    
    opt = tf.train.AdamOptimizer(learning_rate=.5e1,beta1=.9,beta2=.999,epsilon=1e-08).minimize(loss)
#     opt = tf.train.AdamOptimizer(learning_rate=.5e0,beta1=.9,beta2=.999,epsilon=1e-08).minimize(loss)
    
    init = tf.global_variables_initializer()

In [26]:
sess = tf.Session(graph=g)
sess.run(init)
losses=[]

1: 295722.69

2: 13813139.0

3: 13334747.0

4: 13579524.0

5: 10864159.0


In [27]:
#sess.run(loss_style)

In [ ]:
num_steps = 2000
for i in tqdm(range(num_steps)):
    l,_=sess.run([loss,opt])
    losses.append(l)
    if i %10 == 0:
        after = sess.run(alter_img)
        after = post_process(after[0])
        Image.fromarray(np.uint8(np.clip(after,0,255))).save('code_pic2/img'+str(len(losses)/10)+'.png')
after = sess.run(alter_img)


  0%|          | 7/2000 [01:35<6:30:12, 11.75s/it]

In [ ]:
lc,ls=sess.run([loss_content,loss_style])
print 'loss content: ',lc
print 'loss style: ',ls

In [ ]:
lc/ls

Postprocess image


In [ ]:
after = post_process(after[0])
Image.fromarray(np.uint8(np.clip(after,0,255)))

In [ ]:
plt.plot(losses)

In [ ]:


In [ ]: